import scrapy

from ScrapyObject.spiders.utils.url_utils import *

'''
联通
scrapy crawl langyounine -o langyounine.json
https://thd1.mzsbxq.com/main/
'''


class LangyounineSpider(scrapy.Spider):
    # 前缀
    prefix = 'https://thd1.'
    # 中缀
    website = 'mzsbxq'
    # 后缀
    suffix = '.com/'
    name = 'langyounine'
    allowed_domains = [website + '.com']
    start_urls = [prefix + website + suffix + "main/"]

    def __init__(self):
        self.i = 0

    def parse(self, response):
        # 获取字符串类型的网页内容
        content = get_data(response)
        video_url = get_video_url_three(content)
        if len(video_url):
            self.i = self.i + 1
            yield get_video_item(id=self.i, tags='', url=response.url, name='', pUrl="", vUrl=format_url_one(video_url[0]))
        pic_list = response.xpath("//li[@class='col-md-2 col-sm-3 col-xs-4 ']//a[@class='video-pic loading']/@ style").extract()
        url_list = response.xpath("//li[@class='col-md-2 col-sm-3 col-xs-4 ']//a[@class='video-pic loading']/@ href").extract()
        names = response.xpath("//li[@class='col-md-2 col-sm-3 col-xs-4 ']//a[@class='video-pic loading']/@ title").extract()
        tags = response.xpath("//li[@class='col-md-2 col-sm-3 col-xs-4 ']//table//tr//td//div[@align='right']/text()").extract()
        if len(pic_list) == len(url_list) and len(pic_list) == len(names) and len(pic_list) == len(tags):
            for i in range(len(url_list)):
                self.i = self.i + 1
                yield get_video_item(id=self.i, tags=tags[i], url=split_joint(self.prefix + self.website + self.suffix, url_list[i]), name=names[i], pUrl=pic_list[i].split('(')[1].split(')')[0])
        url_list = get_url(content)
        for url in url_list:
            if url.startswith('/') and url.endswith('.html'):
                yield scrapy.Request(split_joint(self.prefix + self.website + self.suffix, url), callback=self.parse)
